## trying the r package!library(vegabrite)library(tidyverse)
Warning: package 'purrr' was built under R version 4.3.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.2 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
The data for this visualization come from FAOSTAT, published by the Food and Agriculture Administration.
## reading in the CSVmango_data =read_csv("data/UNdata_Export_20240311_114817217.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 32562 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Country or Area, Element, Unit, Value Footnotes
dbl (2): Year, Value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## tidying data in r mango_data_wide <- mango_data %>%select('Country or Area', 'Element', 'Year', 'Value') %>%drop_na() %>%pivot_wider(names_from ='Element', values_from ='Value') %>%rename('production_index_per_capita'='Gross per capita Production Index Number (2014-2016 = 100)','production_index'='Gross Production Index Number (2014-2016 = 100)') %>%filter(`Area harvested`>0) mango_data_wide_country <- mango_data_wide %>%rename('Country'='Country or Area') %>%filter(!Country %in%c('World', 'Asia', 'Africa', 'Americas', 'Australia and New Zealand', 'Central America', 'Other non-specified areas', 'China, mainland', 'Land Locked Developing Countries', 'Least Developed Countries', 'Low Income Food Deficit Countries', 'Net Food Importing Developing Countries', 'Northern Africa', 'Northern America', 'Oceania', 'Polynesia', 'Puerto Rico', 'Réunion', 'Small Island Developing States', 'South America', 'South-eastern Asia', 'Southern Africa', 'Southern Asia', 'Western Africa', 'Western Asia', 'Caribbean', 'Eastern Asia', 'Eastern Africa'))mango_data_wide_region <- mango_data_wide %>%rename('Region'='Country or Area') %>%filter(Region %in%c('Central America', 'South America', 'South-eastern Asia', 'Southern Asia', 'Western Africa', 'Eastern Asia', 'Eastern Africa', 'Southern Africa', 'Northern Africa', 'Northern America')) ## only regions where mangoes are grownmango_data_wide_region
“When making scatter plots or time series , we are often more interested in the overarching trend of the data than in the specific detail of where each individual data point lies”
This led me to connect my timeseries and scatterplot data, rather than just presenting the points. Wilke is also a fan of “smoothing,” to show the “big picture,” so I will try that as well.
## making my mango area chartmango_area <-vl_chart() %>%vl_encode_y('Production', type ="quantitative") %>%vl_add_data(mango_data_wide_region) %>%vl_mark_area(fillOpacity =0.7) %>%vl_encode_x('Year', type ="ordinal") %>%vl_axis_x(title='', labels =FALSE) %>%vl_encode_fill('Region:N') %>%vl_encode_tooltip('Region') %>%vl_add_properties(title="South Asia Historically Dominates Mango Production | FAOSTAT",width =570,height =300)mango_bar <-vl_chart() %>%vl_add_data(mango_data_wide_region) %>%vl_mark_bar() %>%vl_encode_x('Year', type ="ordinal") %>%vl_encode_y('Area harvested', type ="quantitative") %>%vl_encode_fill('Region:N') %>%vl_encode_tooltip('Region') %>%vl_add_properties(width =570,height =80)vl_vconcat(mango_area, mango_bar)
I also started messing around with some of of the regression features after starting with the visualization There is definitely a better way to code this…
## DURING THE GREEN REVOLUTION ##rev_scatterplot <-vl_chart() %>%vl_add_data(mango_data_wide_country) %>%vl_filter('datum.Year < 1986') %>%vl_mark_point() %>%## encode xvl_encode_x('Area harvested', type ="quantitative") %>%vl_scale_x(type ='log') %>%## encode yvl_encode_y('Production', type ="quantitative") %>%vl_scale_y(type ='log', domainMax =100000000) %>%## set default colorvl_encode_color(value ='lightgray') %>%vl_encode_tooltip('Country') %>%vl_encode_opacity(value =0.2) %>%## removing gridlinesvl_axis_x(grid =FALSE) %>%vl_axis_y(grid =FALSE)rev_regression <-vl_chart() %>%vl_add_data(mango_data_wide_country) %>%vl_filter('datum.Year < 1986') %>%vl_regression(regression ='Production', on ='Area harvested', method ='pow') %>%vl_encode_y('Production', type ="quantitative") %>%vl_encode_x('Area harvested', type ="quantitative") %>%vl_mark_line(color ='firebrick') green_revolution <-vl_layer(rev_scatterplot, rev_regression) %>%vl_add_properties(height=200, width =200)
## AFTER THE GREEN REVOLUTION ##postrev_scatterplot <-vl_chart() %>%vl_add_data(mango_data_wide_country) %>%vl_filter('datum.Year > 1986') %>%vl_mark_point() %>%## encode xvl_encode_x('Area harvested', type ="quantitative") %>%vl_scale_x(type ='log') %>%## encode yvl_encode_y('Production', type ="quantitative") %>%vl_scale_y(type ='log', domainMax =100000000) %>%## set default colorvl_encode_color(value ='lightgray') %>%vl_encode_tooltip('Country') %>%vl_encode_opacity(value =0.2) %>%## removing gridlinesvl_axis_x(grid =FALSE) %>%vl_axis_y(grid =FALSE)postrev_regression <-vl_chart() %>%vl_add_data(mango_data_wide_country) %>%vl_filter('datum.Year > 1986') %>%vl_regression(regression ='Production', on ='Area harvested', method ='pow') %>%vl_encode_y('Production', type ="quantitative") %>%vl_encode_x('Area harvested', type ="quantitative") %>%vl_mark_line(color ='firebrick') post_green_revolution <-vl_layer(postrev_scatterplot, postrev_regression) %>%vl_add_properties(height=200, width =200)vl_hconcat(green_revolution, post_green_revolution)